rm(list = ls())

# # Uncomment and set the working directory with all Harvard database here
# setwd("")


# Preliminary requirements ------------------------------------------------

# libraries
library("pacman") # One library to load (and download if necessary) packages
p_load(tidyverse,ggplot2,ggeffects,estimatr,patchwork,
       modelsummary,magrittr,kableExtra,
       fixest, MatchIt,sf)


# Data --------------------------------------------------------------------

#open individual-level dataset
df <- readRDS(file = "work_democracy_data.rds")

# subset and new dataframe only including those drafted as poll officers
df_mesa <- df |> filter(dataset==2)


# Descriptives ------------------------------------------------------------


#### Table 1: Census tracts by type (# of women as poll officers in the draft) ####
d_t1 <- df_mesa |> group_by(samplewomen,ctid) |> tally()
d_t1 |> group_by(samplewomen) |> tally()

table(df_mesa$samplewomen, df_mesa$female)
prop.table(table(df_mesa$samplewomen, df_mesa$female), margin = 1)

rm(d_t1)

#### Figure 2: Map of poll officers data ####
# Load shapefile of Girona municipalities in 1930s
mapgi36 <- st_read("maps/Girona_munis_1936.shp") |> 
  rename(cmuni = codi_CED)

# Classify municipalities in sample by female presence
df_map <- df_mesa |> 
  group_by(muni,cmuni,ctid,samplewomen) |> 
  summarise() |> # identify whether there was a woman poll officer in precinct
  ungroup() |> 
  group_by(muni,cmuni,samplewomen) |> 
  summarise(num = n()) |> # number of precincts per municipality by poll officer characteristics
  group_by(muni,cmuni) |> 
  mutate(count = n()) |> # count whether municipality has precincts with different characteristics
  ungroup() |> 
  filter(count==1  | count>1 & samplewomen=="Acting Women") |> #keep one obs. per municipality
  mutate(treated = if_else(samplewomen=="Only Men",2,1)) |> # variable to use in map (1: women drafted, 2: randomly selected precincts)
  dplyr::select("muni","cmuni","treated")

# map
dfmap <- left_join(mapgi36,df_map, by="cmuni")|> 
  mutate(treateddummy = factor(if_else(is.na(treated),3,treated),
                               labels = c("Women Poll Officers",
                                          "Randomized Sample",
                                          "Not Analyzed")))
# Final Map
p_load(ggspatial,prettymapr)
fig2 <- ggplot(dfmap)+
  # # Uncomment for map background. It takes some extra time
  # annotation_map_tile(type = "cartolight",
  #                     zoom = 10) + #potentially uncomment this one or decrease zoom (8 is too low)
  geom_sf(aes(fill = treateddummy)) +
  theme(panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position="bottom") +
  scale_fill_grey(start = 0.15, end = .95)+
  labs(fill="") # Some missing municipalities because no individual in the treated or control datasets

fig2

# remove objects
rm(dfmap,df_map,mapgi36)


#### Table 2: Individual-level balance between those drafted as acting and alternate ####

df_mesa1 <- df_mesa  |>  ungroup()  |>  
  mutate(Selected = ifelse(DOGC_acting==1, 'Acting', 'Alternate'),
         treat_status = ifelse(draft_complier_1933=="Draft","Alternate",
                               ifelse(draft_complier_1933=="Poll", "Poll Officer","Defier"))) |> 
  select(Selected,
         `Age`=age,
         `Sex` = female,
         `Literacy (Individual)`=literacy,
         `Occupation`=Hisco1,
         `Voted 1931 (Only Men)`=vot_1931,
         `Voted 1933`=vot_1933,
         `Voted 1936`=vot_1936,
         `Treatment status`=treat_status,
         `(Log) industry (municipality)` = logindust,
         `(Log) Population (municipality)` = logpop,
         `Literacy (municipality)` = literacymuni,
         `% Women married (municipality)` = femalemarried,
         `% Left vote (1933) (census-tract)` = Left_33,
         `% Right vote (1933) (census-tract)` = Right_33)

tab2 <- datasummary_balance(~Selected,
                            output = "kableExtra",
                            data = df_mesa1,
                            fmt = 2,
                            stars = T) |> 
  add_header_above(c(" "=2, "Poll officer draft in 1933" = 4," "=2))
tab2

rm(df_mesa1,summaryname)

#### Table  3: Treatment compliance ####
tab3 <- table(df_mesa$female,df_mesa$draft_complier_1933)
tab3

prop.table(table(df_mesa$female,df_mesa$draft_complier_1933),margin = 1)


# Main Results ------------------------------------------------------------

##### Baseline Results: Figure 3 ####

### Top panel: polling officers vs. not polling officers
## Estimate model
mpred <- lm(vot_1936 ~ mesa33*female + # female negative, interaction non-significant: the gap closes!
              age + vot_1933 +
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa)
  
  # Save estimates
  mydf <- ggpredict(mpred, c("mesa33","female"), ci.lvl = 0.95,)
  mydf2 <- ggpredict(mpred, c("mesa33","female"), ci.lvl = 0.90) |> 
      rename(x2=x, pred2 = predicted,std.error2=std.error,
           conf.low2=conf.low,conf.high2=conf.high, group2=group)
  
  # Combine estimates
  mydf <- cbind(mydf,mydf2) |> 
    mutate(x = factor(mydf$x,
                      levels=c(0,1),
                      labels=c("No\n(n=269)","Yes\n(n=213)")))
  
  # Manually add statistical significance
  mydf <- mydf |> 
    mutate(predtext=case_when(group=="Women" & x2=="0" ~ paste0(round(predicted,digits = 2),"***"),
                              T ~ paste0(round(predicted,digits = 2))))

# Plot Figure 3a
fig3a <- ggplot(mydf, aes(x=x, y = predicted,color=group,label=round(predicted,digits = 2))) +
  geom_point(size = 2,position=position_dodge(0.2)) +
  geom_text(aes(label=predtext),
            position = position_dodge(width = .6),size = 5) +
  geom_pointrange(aes(ymax = conf.high, ymin = conf.low), linewidth=.3, 
                  position=position_dodge(width=0.2)) +
  geom_pointrange(aes(ymax = conf.high2, ymin = conf.low2),linewidth=.9, 
                  position=position_dodge(width=0.2)) +
  theme(panel.background = element_rect(fill = "white", colour = "grey50"), 
        legend.position = "bottom",
        text = element_text(size=20)) +
  scale_color_manual(name="",
                     labels=c("Men",
                              "Women"), values=c("darkgray","black")) +
  scale_y_continuous( "Predicted turnout 1936") +
  scale_x_discrete( "Poll Officer in 1933")

fig3a

#### Bottom panel: polling officers vs. not polling officers
## Estimate model
mpred <- lm(vot_1936 ~ draft_complier_1933*female + age +
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa)

  # Save estimates
  mydf <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.95)
  mydf2 <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.9) |>  
    rename(x2=x, pred2 = predicted,std.error2=std.error,
           conf.low2=conf.low,conf.high2=conf.high, group2=group)

  # Combine
  mydf <- cbind(mydf,mydf2)
  # Change labels
  levels(mydf$x) <- c("Alternate\n(n=213)", "Poll Officer\n(n=213)", "Defier\n(n=56)")
  
  # Manually add statistical significance
  mydf <- mydf |> 
    mutate(predtext=case_when(group=="Women" & x2=="Draft" ~ paste0(round(predicted,digits = 2),"***"),
                              group=="Women" & x2=="Poll" ~ paste0(round(predicted,digits = 2),"**"),
                              T ~ paste0(round(predicted,digits = 2))))

# Plot Figure 3b
fig3b <- ggplot(mydf, aes(x=x, y = predicted,color=group,label=round(predicted,digits = 2))) +
  geom_point(size = 2,position=position_dodge(0.2)) +
  geom_text(aes(label=predtext),
            position = position_dodge(width = .75),size = 5) +
  geom_pointrange(aes(ymax = conf.high, ymin = conf.low), linewidth=.3, 
                  position=position_dodge(width=0.2)) +
  geom_pointrange(aes(ymax = conf.high2, ymin = conf.low2), linewidth=.9, 
                  position=position_dodge(width=0.2)) +
  theme(panel.background = element_rect(fill = "white", colour = "grey50"), 
        legend.position = "bottom",
        text = element_text(size=20)) +
  scale_color_manual(name="",
                     labels=c("Men",
                              "Women"), values=c("darkgray","black")) +
  scale_y_continuous( "Predicted turnout 1936") +
  scale_x_discrete("") 

fig3b

# Remove objects
rm(mpred,mydf,mydf2)

#### Instrumental Variable ####

### First stage estimates
first_stage <- lm(mesa33  ~  DOGC_acting + 
                       female + age + Hisco1+
                       logpop + logindust + literacymuni + Left_33 + turnout_33 + comarca,
                  data = df_mesa)

# Predicted values of being a member of the polling board
df_mesa$predmesa33<-predict(first_stage,newdata =df_mesa)
  # Binary prediction of likelihood to become poll officer
  df_mesa <- df_mesa |> 
    mutate(predmesa33b = if_else(predmesa33<.5,0,1))

### Second stage
## Unadjusted version
second_stage1 <- feols(vot_1936 ~ # Dependent Variable
                         predmesa33*female | # Covariates of interest
                         comarca, # Fixed-Effects
                       cluster = ~ cmuni, data = df_mesa)

## Including more controls
second_stage2 <- feols(vot_1936 ~ # Dependent Variable
                         predmesa33*female + # Covariates of interest
                         vot_1933 + age + Hisco1 + # Controls
                         logindust + logpop + literacymuni + # More controls
                         Left_33 + turnout_33 | # More Controls
                         comarca, # Fixed-Effects
                       cluster = ~ cmuni, data = df_mesa)

# Assign labels to variables
cm <- c("DOGC_acting" = "Draft Acting",
        "predmesa33" = "Pred. Poll Officer",
        "femaleWomen" = "Woman",
        "predmesa33:femaleWomen" = "Pred. Poll Officer x Woman",
        "age"="Age",
        "Occupation"="Hisco1",
        "vot_1933"="Voted 1933",
        "logindust"="(Log) Industry","logpop"="(Log) Population",
        "literacymuni"="Literacy (mun)",
        "Left_33" = "% Left (1933)",
        "turnout_33" = "Turnout (1933)",
        '(Intercept)' = 'Constant')

tab4 <- msummary(list('Stage 1' = first_stage,
              'Stage 2' = second_stage1,
              'Stage 2 (Full)' = second_stage2),
         vcov = ~muni,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         gof_omit = 'AIC|BIC|Log.Lik.|F|Std.Errors|R2 Pseudo|R2 Within|R2 Within Adj.|RMSE',
         notes = list('Standard errors clustered at the municipality-level.',
                      'Comarca FE.',
                      'Control for individual occupation also included.'),
         coef_map=cm,
         output = "kableExtra") |> 
  add_header_above(c(" "=1, "Poll Officer" = 1,"Vote 1936"=2))

tab4

rm(first_stage,second_stage1,second_stage2,cm)

#### Matching ####
# 
# # This is the code we used to match poll officers to individuals from a random sample
# # of voters in the Girona provice. Access to these data is restricted, but we are happy
# # to share access upon request
# 
# # variables to do matching
# df_match <- df |> 
#   mutate(dogc = ifelse(dataset=="2",1,0)) |> 
#   select(dogc, age, literacy,vot_1933,vot_1936,Hisco1,female,
#          logindust,literacymuni,logpop,literacyfemale,femalemarried, femalerate, popconcent,
#          comarca) |> 
#   mutate(comarca = as_factor(comarca)) |> 
#   na.omit()
# 
# # matching process
# mod_match <- matchit(dogc ~ age + literacy + female + Hisco1 +
#                        logindust + literacymuni + logpop + popconcent + 
#                        femalerate + femalemarried,
#                      method = "nearest", # nearest-matching
#                      data = df_match)  
# 
# # matching data to dataset
# df_match1 <- match.data(mod_match) |> 
#   select(c(individualid,weights,subclass))
# 
# # Join matched dataset to complete dataset 
# df_m <- df  |> 
#   left_join(df_match1)  |>  
#   filter(!is.na(weights)) # keep only if matched sample
# 
# rm(df_match,df_match1,mod_match)
  
# For the purposes of reproducibility the database includes
# those individuals who were matched based on the application
# of the previous code
df_m <- df
  
##### Graph including control group based on Matching #####
# Model
mpred <- lm(vot_1936 ~ draft_complier_1933*female + age +
                logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
              data = df_m)
  # CI
  mydf <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.95)
  mydf2 <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.9) |>  
    rename(x2=x, pred2 = predicted,std.error2=std.error,
           conf.low2=conf.low,conf.high2=conf.high, group2=group)
  
  mydf <- cbind(mydf,mydf2)
  
  # Change labels
  levels(mydf$x) <- c("Control\n(matching)", "Alternate", "Poll \nOfficer", "Defier")
  
  # Graph
fig4 <- ggplot(mydf, aes(x=x, y = predicted,color=group,label = round(predicted,digits = 2))) +
    geom_point(size = 2,position=position_dodge(0.2)) +
    geom_text(position = position_dodge(width = .7), size = 5) +
    geom_pointrange(aes(ymax = conf.high, ymin = conf.low),linewidth=.3, 
                    position=position_dodge(width=0.2)) +
    geom_pointrange(aes(ymax = conf.high2, ymin = conf.low2),linewidth=.9, 
                    position=position_dodge(width=0.2)) +
    theme(panel.background = element_rect(fill = "white", colour = "grey50"), 
          legend.position = "bottom",
          text = element_text(size=20)) +
    scale_color_manual(name="",
                       labels=c("Men",
                                "Women"), values=c("darkgray","black")) +
    scale_y_continuous( "Predicted turnout 1936") +
    scale_x_discrete( "") 

fig4

# Remove objects 
rm(mpred,mydf,mydf2)


# Mechanisms --------------------------------------------------------------

##### Mobilization #####
# Figure define letters instead of dots
uniqueInitials <- c("M","W")
initialShapes <- unlist(lapply(uniqueInitials,utf8ToInt))

## Figure of mobilization 
df_mesa <- df_mesa |> 
  mutate(predmesa33b = factor(predmesa33b, # Predicted poll officer as factor
                              levels=c(0,1),
                              labels=c("No", "Yes")),
         # CNT as factor
         cnt_dummy = factor(cnt_dummy,
                            levels = c(0,1),
                            labels = c("Anarchist = No", "Anarchist = Yes")),
         # Lliga rallies as factor
         rally_lliga = factor(rally_lliga,
                               levels = c(0,1),
                               labels = c("Lliga (right) = No",
                                          "Lliga (right) = Yes")),
         # Ateneu presence as factor
         ateneu_dummy = factor(ateneu_dummy,
                                levels = c(0,1),
                                labels = c("Ateneu (left) = No",
                                           "Ateneu (left) = Yes"))
         )

### CNT Heterogeneous effects
# Model
regcnt <- lm(vot_1936 ~ predmesa33b*female*cnt_dummy + age + vot_1933 +
             logindust + logpop + literacymuni + femalemarried + parity,
             data = df_mesa)

# Predicted values
pred <- ggpredict(regcnt, c("predmesa33b","female","cnt_dummy"), ci.lvl = 0.9)

# Graph
fig5a_cnt <- pred |> 
  ggplot() +
    geom_point(aes(y=predicted,x=x,shape=group,color=group),
               position=position_dodge(0.4),
               size = 4)+
    geom_linerange(aes(y=predicted,x=x,color=group,
                        ymax = conf.high, ymin = conf.low),
                   linewidth=.4, alpha=.7,
                   position=position_dodge(width=.4)) +
    scale_shape_manual(values=initialShapes)+
    scale_color_manual(values=c("gray45","black"))+
    facet_grid(cols = vars(facet))+
    ylab("Pred. Turnout (1936)") + ylim(.35,1.05)+
    xlab("") +
    theme_minimal() +
    theme(legend.title = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid = element_line("gray95")) 

fig5a_cnt

## Simpler alternative
# gcnt <-plot(ggpredict(regcnt, c("predmesa33b","female","cnt_dummy"), ci.lvl = 0.9),
#              show.title = F , colors =c("darkgray","black")) +
#   ylab("Pred. Turnout (1936)") + ylim(.35,1.05)+
#   xlab("Predicted Polling Officer") +
#   # ylab(element_blank()) + xlab(element_blank())+
#   theme(legend.title = element_blank()) + theme_minimal()
# gcnt

### Lliga Heterogeneous effects
# Estimates
reglliga <- lm(vot_1936 ~ predmesa33b*female*rally_lliga + age + vot_1933 +
                 logindust + logpop + literacymuni + femalemarried + parity,
               data = df_mesa) 

# Predicted values
pred <- ggpredict(reglliga, c("predmesa33b","female","rally_lliga"), ci.lvl = 0.9)

# Graph
fig5b_lliga <- pred |> 
  ggplot()+
  geom_point(aes(y=predicted,x=x,shape=group,color=group),
             position=position_dodge(0.4),
             size = 4)+
  geom_linerange(aes(y=predicted,x=x,color=group,
                     ymax = conf.high, ymin = conf.low),
                 linewidth=.4, alpha=.7,
                 position=position_dodge(width=.4)) +
  scale_shape_manual(values=initialShapes)+
  scale_color_manual(values=c("gray45","black"))+
  facet_grid(cols = vars(facet))+
  ylab("") + ylim(.35,1.05)+
  xlab("") +
  theme_minimal() +
  theme(legend.title = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid = element_line("gray95")) 

fig5b_lliga

# # Simpler alternative
# glliga <- plot(ggpredict(reglliga, c("predmesa33b","female","rally_lliga"), ci.lvl = 0.9), 
#                show.title = F, colors =c("darkgray","black")) +
#   ylab("Pred. Turnout (1936)") + ylim(.35,1.05)+
#   xlab("Predicted Polling Officer") +
#   # ylab(element_blank()) + xlab(element_blank())+
#   theme(legend.title = element_blank()) + theme_minimal()
# glliga

### Ateneu Heterogeneous effects
# Estimate
regateneu <- lm(vot_1936 ~ predmesa33b*female*ateneu_dummy + age + vot_1933 +
                  logindust + logpop + literacymuni + femalemarried + parity,
                data = df_mesa)

# Predicted values
pred <- ggpredict(regateneu, c("predmesa33b","female","ateneu_dummy"), ci.lvl = 0.9)

# Graph
fig5c_ateneu <- pred |> 
  ggplot()+
  geom_point(aes(y=predicted,x=x,shape=group,color=group),
             position=position_dodge(0.4),
             size = 4)+
  geom_linerange(aes(y=predicted,x=x,color=group,
                     ymax = conf.high, ymin = conf.low),
                 linewidth=.4, alpha=.7,
                 position=position_dodge(width=.4)) +
  scale_shape_manual(values=initialShapes)+
  scale_color_manual(values=c("gray45","black"))+
  facet_grid(cols = vars(facet))+
  ylab("Pred. Turnout (1936)") + ylim(.35,1.05)+
  xlab("Predicted Poll Officer") +
  theme_minimal() +
  theme(legend.title = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid = element_line("gray95")) 

fig5c_ateneu

# # Simpler alternative
# gateneu <- plot(ggpredict(regateneu, c("predmesa33b","female","ateneu_dummy"), ci.lvl = 0.9), 
#                 show.title = F, colors =c("darkgray","black")) +
#   ylab("Pred. Turnout (1936)") + ylim(.35,1.05)+
#   xlab("Predicted Polling Officer") +
#   # ylab(element_blank()) + xlab(element_blank())+
#   theme(legend.title = element_blank())+ theme_minimal()
# 
# gateneu

### Catholic Associations Heterogeneous Effects
# Estimates
regcath <- lm(vot_1936 ~ predmesa33b*female*cath_ass_percent33 + age + vot_1933 +
                logindust + logpop + literacymuni + femalemarried + parity,
              data = df_mesa)

# Predicted values
pred <- ggpredict(regcath, c("predmesa33b","female","cath_ass_percent33  [0,30]"), ci.lvl = 0.9) |> 
  mutate(facet1 = if_else(facet==0,"0% Cath. Assoc.","30% Cath. Assoc."))  |> as.data.frame() |>
  select(-facet) |> rename(facet=facet1)

# Graph
fig5d_cath <- pred |> 
  ggplot()+
  geom_point(aes(y=predicted,x=x,shape=group,color=group),
             position=position_dodge(0.4),
             size = 4)+
  geom_linerange(aes(y=predicted,x=x,color=group,
                     ymax = conf.high, ymin = conf.low),
                 linewidth=.4, alpha=.7,
                 position=position_dodge(width=.4)) +
  scale_shape_manual(values=initialShapes)+
  scale_color_manual(values=c("gray45","black"))+
  facet_grid(cols = vars(facet))+
  ylab("") + ylim(.35,1.05)+
  xlab("Predicted Poll Officer") +
  theme_minimal() +
  theme(legend.title = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid = element_line("gray95")) 

fig5d_cath

# # Simpler alternative
# gcath <- plot(ggpredict(regcath, c("predmesa33b","female","cath_ass_percent33  [0,30]"),
#                         ci.lvl = 0.9),show.title = F, colors =c("darkgray","black")) +
#   ylab("Pred. Turnout (1936)") + ylim(.35,1.05)+
#   xlab("Predicted Polling Officer") +
#   # ylab(element_blank()) + xlab(element_blank())+
#   theme(legend.title = element_blank()) + theme_minimal()
# 
# cath_lab <- as_labeller(c("cath_ass_percent33 = 0" = "0% Cath. Assoc.",
#                           # "cath_ass_percent33 = 15" = "15% Cath. Assoc.",
#                           "cath_ass_percent33 = 30" = "30% Cath. Assoc."))
# 
# gcath<- gcath + facet_grid(cols = vars(facet),
#                            labeller = labeller(facet = cath_lab))
# gcath

# Combination graphs
fig5 <- fig5a_cnt + fig5b_lliga + fig5c_ateneu + fig5d_cath +
  plot_layout(guides = "collect") +
  labs(caption = "90% Confidence Intervals") & theme(legend.position = "bottom",
                                                   legend.title = element_blank()) 

fig5

# Remove objects
rm(pred,initialShapes,uniqueInitials,
   regateneu,regcath,regcnt,reglliga)


##### Peer-effects #####
# Create new df
df_peer <- df_m |>
  # Dummy if there is a woman poll officer 
  mutate(FemPollWork = if_else(NumFemPollWork>0,1,0))
# FemPollWork as factor
df_peer$FemPollWork <- factor(df_peer$FemPollWork,
                              levels = c(0,1),
                              labels = c("All Male Poll Officers",
                                         "Women Poll Officer"))
# Drop all defiers
df_peer <- df_peer  |> 
  filter(draft_complier_1933!="Defier")

# Change labels of variable
levels(df_peer$draft_complier_1933) <- c("Control\n(matching)","Draft","Poll\nOfficer","Defier")

# Estimate model
fempoll <-feols(vot_1936 ~ FemPollWork*female*draft_complier_1933 + age | cmuni,
            data = df_peer)
summary(fempoll)

# Predicted values
pred_fempoll <- ggpredict(fempoll, c("draft_complier_1933","female","FemPollWork"), ci.lvl = 0.9)

# Drop estimates for the category women in All male poll officers
pred_fempoll[11,2] <- NA
pred_fempoll[11,4] <- NA
pred_fempoll[11,5] <- NA

# Graph
fig6 <- plot(pred_fempoll , show.title = F) +
  theme_minimal() + scale_color_manual(name="", values=c("darkgray","black")) +
  ylab("Predicted Turnout (1936)") + xlab(element_blank()) + 
  labs(title = "", caption = '90% CI \nMunicipality FE')+
  theme(legend.position="bottom")

fig6

rm(df_peer,fempoll,pred_fempoll)

##### Social Proximity #####
df_m <- df_m |> 
  # Dummy if there is a woman poll officer 
  mutate(FemPollWork = if_else(NumFemPollWork>0,1,0))
# FemPollWork as factor
df_m$FemPollWork <- factor(df_m$FemPollWork,
                              levels = c(0,1),
                              labels = c("All Male Poll Officers",
                                         "Women Poll Officer"))

# Estimates
x3popcon <- lm(vot_1936 ~ FemPollWork*female*popconcent + age + comarca,
               data = df_m)


# Predict values
x3 <- ggpredict(x3popcon, c("popconcent","female","FemPollWork"), ci.lvl = 0.9) 

# To show histogram of population concentration by precinct
df_prec <- df_m |> ungroup() |> 
  select(ctid,FemPollWork,popconcent) |> 
  distinct() |> 
  drop_na() |> 
  rename(x = popconcent,
         facet = FemPollWork) |> 
  select(-ctid)

# Graph
fig7 <- ggplot()+
  geom_point(data = x3,aes(x = x, y = predicted,
                           group = group, color = group),
             position=position_dodge(width=.1))+
  geom_linerange(data = x3,aes(x = x, y = predicted,
                               ymin = conf.low, ymax = conf.high,
                               group = group, color = group),
                 position=position_dodge(width=.1))+
  geom_histogram(data=df_prec, aes(x = x,
                                   y = after_stat(count / sum(count))))+
  scale_x_continuous(breaks = c(0,.2,.4,.6,.8,1))+
  facet_grid(cols = vars(facet))+
  scale_color_manual(name="", values=c("darkgray","black")) +
  scale_fill_manual(name="", values=c("darkgray","black")) +
  ylab("Predicted Turnout (1936)") +
  xlab("Population Concentration") +
  theme_minimal()+
  theme(panel.grid.minor = element_blank(),
        legend.title = element_blank(),
        legend.position="bottom")

fig7

rm(x3,x3popcon)


# End of script -----------------------------------------------------------
rm(list = ls())
